


clear all

global afrobar="C:\Users\xisaka\ShareFile\Personal Folders\Afrobarometer"
global aiddata="C:\Users\xisaka\ShareFile\Personal Folders\AidData"
global china_ethnic= "C:\Users\xisaka\Dropbox\Ias\MINA PAPERS\chinese aid and ethnic sentiments\stata"
global temp="C:\Users\xisaka\ShareFile\Personal Folders\temp data"

set more off
set maxvar 32767





/***************CLEANING THE WORLD BANK AID DATA *******************************/
/*******************************************************************************/





*******************destring the project_id variable before merging
use "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_projects.dta", clear
destring project_id, ignore ("P") replace
sort project_id
save "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_projects.dta", replace


use "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_locations.dta", clear
destring project_id, ignore ("P") replace
sort project_id
save "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_locations.dta", replace


use "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_ancillary.dta", clear
*rename projectid project_id
destring project_id, ignore ("P") replace
sort project_id
save "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_ancillary.dta", replace


***********************merging together the small WB aid datasets

use "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_projects.dta", clear

merge project_id using "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_locations.dta"
tab _merge					
drop _merge
sort project_id

merge project_id using "\\home.gu.gu.se\home-XI$\xisaka\Documents\DATA\WB geocoded aid\WBaid_1.3_ancillary.dta"
tab _merge			
drop _merge
sort project_id

 
*fixing the project_location_id variable
destring project_location_id, ignore ("P, _") replace

keep if region=="AFRICA"
 
 
*********************keep observations with more exact geocodes
  tab precision_code
 keep if precision_code<3
 

 
 **********************Drop the ones that do not have start date----> no obs deleted, all had start date
 drop if start_actual_isodate==""

 
 **********************fix start and end dates so they only give the years
 
 gen start_year	=substr(start_actual_isodate, 1,4)			
 gen end_year	=substr(end_actual_isodate, 1,4)
 

 

			   
save "$aiddata\WorldBank_project_locations_clean.dta", replace
		

	 
	 
	 
 

 
 
 
 
 
****************************************GEOMATCHING THE AID DATA AND THE AFROBAROMETER DATA****************************************
***********************************************************************************************************************************

 


********************************nearstat WB all flows vs new afro (precision code 1, 2, 3)





**********open the collapsed Afro

use "$afrobar/Afro_collapsed_precisioncode123_replication.dta", clear






** Add the aid projects		
append using  "$aiddata\WorldBank_project_locations_clean.dta", keep(project_location_id  latitude longitude)		

** Sort so that nearstat will run, putting all projects first
*gsort project_location_id
* Not numeric so it dd not put the projects first.
gsort latitude


	

** First count how many projects are within 200 km to know how many loops to run nearstat
set more off
foreach dist in 10 25 50 75 100 200 {
	nearstat g_lat g_lon, ///
		near(latitude longitude) distvar(_1projectdist`dist') ///
		ncount(numberwithin`dist') dband(0 `dist') favor(speed)
}
drop _1projectdist10 _1projectdist25 _1projectdist50 _1projectdist75 _1projectdist100 _1projectdist200

** Run nearstat loop, with number of loops equal to max of numberwithin200
sum numberwithin200
forvalues i = 1/`r(max)' {
	nearstat g_lat g_lon if `i'<=numberwithin200, ///
		near(latitude longitude) distvar(_`i'projectdist) nid(project_location_id _`i'closestproject) ///
		kth(`i') favor(speed)
}

set more off


** Add the closest project to those that don't have projects within 200km.
nearstat g_lat g_lon if numberwithin200==0, ///
	near(latitude longitude) distvar(not200_projectdist) nid(project_location_id not200_closestproject) ///
	kth(1) favor(speed)
	
* Temporary save if something goes wrong
save "$temp/temp_aid.dta", replace

replace _1closestproject=not200_closestproject if numberwithin200==0
replace _1projectdist=not200_projectdist if numberwithin200==0

*Now drop all projects (they are now linked to clusters rowwise)
drop if !missing(latitude)
drop project_location_id latitude longitude
drop if missing(g_lat)

*compress attempts to reduce the amount of memory used by your data.
compress





*************************HÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄR













** Merge with detailed data on projects				
set more off
sum numberwithin200
forvalues i = 1/`r(max)' {
	rename _`i'closestproject project_location_id
	merge m:1 project_location_id using "$aiddata\WorldBank_project_locations_clean.dta", keepusing(project_id start_year end_year recipients)
	rename project_id _`i'project_id
	rename start_year _`i'startyear
	rename end_year _`i'endyear
	rename project_location_id _`i'closestproject 
	drop if _merge == 2
	drop _merge
}

compress




* Not wave here.
** Merge back with precollapse data. Now all respondents with the same coordinates will have exactly the same projects identified nearby. 
set more off
merge 1:m g_lat g_lon using "$afrobar/Afro_tot_precisioncode123_replication.dta"
drop if _merge==2
drop _merge

save "$temp/temp_aidC.dta", replace





use "$temp/temp_aidC.dta", clear

destring *startyear, replace		

destring *endyear, replace


set more off
foreach dist in 25 50 75 100 200 {
	gen byte active`dist' = .
	gen byte suspended`dist' = .
	gen byte inactive`dist' = .
	gen byte activecodehelp`dist' = .
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Create a helpvariable to be used for suspended
		replace activecodehelp`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' & _`i'endyear>int_year
		* Resp active if any project is active within X km in or after interview year
		replace active`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' 
		label variable active`dist' "Active if any project is active"
		* Resp suspended if any project is suspended within X km
		replace suspended`dist' = 1 if _`i'endyear<int_year & _`i'endyear<.  & _`i'projectdist <= `dist' 
		* But not if at least one project is active within X km
		replace suspended`dist' = 0 if activecodehelp`dist'==1
		label variable suspended`dist' "Suspended if all projects have ended"
		* Resp inactive if any project is inactive within X km
		replace inactive`dist' = 1 if _`i'startyear>int_year & _`i'projectdist <= `dist'
		label variable inactive`dist' "Inactive if a project will start, conditional on no projects before"
		* But not if at least one project is either active or suspended within X km
		* Area considered "contaminated"
		replace inactive`dist' = 0 if active`dist'==1 | suspended`dist'==1

	}
	* Resp not active/inactive/suspended if never replaced in the loop above
	replace active`dist'=0 if missing(active`dist')
	replace inactive`dist'=0 if missing(inactive`dist')
	replace suspended`dist'=0 if missing(suspended`dist')
	noisily di as text "Calculated distance " as result `dist'
}


compress
save "$temp/temp_aid2C.dta", replace		//large file



************generating some further variables

*generate country and year dummies (and region dummies if we want to use that)
tab int_year, gen(yd)
tab country, gen(cd) 
tab region, gen (rd)

* Generate distance variable
gen distance=_1projectdist


rename active25 		ongoing25
rename inactive25 		future25
rename active50 		ongoing50
rename inactive50 		future50
rename active75 		ongoing75
rename inactive75 		future75
rename active100 		ongoing100
rename inactive100 		future100
rename active200 		ongoing200
rename inactive200		future200	  
rename suspended25 		completed25
rename suspended50 		completed50
rename suspended75 		completed75
rename suspended100 	completed100
rename suspended200		completed200

*generating some additional variables and interactions
gen preseth_ongoing50=presethnic*ongoing50
gen preseth_future50=presethnic*future50
gen preseth_ongoing25=presethnic*ongoing25
gen preseth_future25=presethnic*future25

gen anyproject50 =cond(numberwithin50>0 ,1,0) if numberwithin50!=.
gen anyproject75 =cond(numberwithin75>0 ,1,0) if numberwithin75!=.
gen anyproject25 =cond(numberwithin25>0 ,1,0) if numberwithin25!=.

*linear time trend variable
gen timetrend=int_year
recode timetrend (2002=1)
recode timetrend (2003=2)
recode timetrend (2004=3)
recode timetrend (2005=4)
recode timetrend (2006=5)
recode timetrend (2007=6)
recode timetrend (2008=7)
recode timetrend (2009=8)
recode timetrend (2010=9)
recode timetrend (2011=10)
recode timetrend (2012=11)
recode timetrend (2013=12)
recode timetrend (2014=13)
recode timetrend (2015=14)

	
*make coefplots for sensitivity analysis
	
gen sum_ongoingfuture25 = ongoing25+future25 	
gen sum_ongoingfuture10 = ongoing10+future10 
gen sum_ongoingfuture50 = ongoing50+future50 
gen sum_ongoingfuture75 = ongoing75+future75 

gen diffhelp25 = ongoing25
gen diffhelp10 = ongoing10
gen diffhelp50 = ongoing50
gen diffhelp75 = ongoing75


 *also excluding zimbabwe togo tanzania namibia cape verde
gen	WBethnicestsample	=cond(((country=="Benin" | country=="Botswana" | country=="Ghana" | country=="Kenya" | country=="Liberia" | country=="Madagascar" | country=="Malawi" | country=="Mali" 	///
| country=="Mozambique" | country=="Nigeria" | country=="Senegal" | country=="South Africa" | country=="Uganda")  					///
 & ethnicidentity!=.	&  unfairlytreated!=.  & presethnic!=. & ongoing25!=. & future25!=.),1,0) 	

*extended wb sample (allow to only have ongoing, exclude namibia from ethnicestsample above) (on top of the 13 benchmark countries, also including Cape Verde, Tanzania, Togo and Zimbabwe)
 gen 	WBextended	=cond(((country=="Benin" | country=="Botswana" | country=="Cape Verde" | country=="Ghana" | country=="Kenya" | country=="Liberia" | country=="Madagascar" | country=="Malawi" | country=="Mali" 	///
| country=="Mozambique" | country=="Nigeria" | country=="Senegal" | country=="South Africa" | country=="Tanzania" | country=="Togo" | country=="Uganda" | country=="Zimbabwe")  					///
 & ethnicidentity!=.	  & presethnic!=. & ongoing25!=. & future25!=.),1,0) 	

 *wb exclude also those not part of chinese (exclude ghana. ,mozambique, south africa, uganda)
 gen	WBsmaller	=cond(((country=="Benin" | country=="Botswana"  | country=="Kenya" | country=="Liberia" | country=="Madagascar" | country=="Malawi" | country=="Mali" 	| country=="Nigeria" | country=="Senegal" )  					///
 & ethnicidentity!=.	&  unfairlytreated!=.  & presethnic!=. & ongoing25!=. & future25!=.),1,0) 	

	



compress
save "$aiddata/WB all projects_afro precision code 1 2 3 data_replication.dta", replace

			
			
			
			
			
			
			
			
			
			
			
/*the end*/

